# Alex Gazmararian
# agazmararian@gmail.com
source("analysis/visibility/analysis_config.R")

library(weights)
library(scales)

g <- readRDS(here("data", "output", "visibility_analysis.rds"))

# Subset to surveys with credit claiming
g <- g %>%
  filter(sample == "Spring2024Finance" | sample == "Summer2024CBAM")
summary(g$start_date)

# Remaining variable set up----

# Variables now created in merge.R script
# Check that derived variables are available
summary(g$credit_all)
summary(g$credit_none)
summary(g$credit_extreme)

# Construct validity----
message("================================================")
message("Construct validity")
message("================================================")

# Differential response time by partisanship?
message("Differential response time by partisanship.\nRegression outcome: Time spent on credit question.")

time.file <- here("output", "pnas", "tables", "tab_S4_credit_time.tex")

time.cap <- "Linear regression model of credit attribution question time latency \\label{tab:credit_time}"

time.notes <- paste(
  "\\textit{Notes:} ",
  "Unit of analysis is the individual survey respondent.",
  "Dependent variable: time (seconds) spent on credit attribution survey question.",
  "Estimates are OLS with heteroskedasticity-robust standard errors in parentheses.",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$.",
  sep = " "
)

m.time <- list()
m.time[[1]] <- feols(credit_page_submit ~ pid3, data = g, vcov = "hetero")
m.time[[2]] <- feols(credit_page_submit ~ pid3 + sample, data = g, vcov = "hetero")
m.time[[3]] <- feols(credit_page_submit ~ ideo3, data = g, vcov = "hetero")
m.time[[4]] <- feols(credit_page_submit ~ ideo3 + sample, data = g, vcov = "hetero")

gm.latency <- gm
gm.latency$omit[gm.latency$raw == "F"] <- FALSE
gm.latency$omit[gm.latency$raw == "adj.r.squared"] <- TRUE

modelsummary(
  m.time,
    stars = c("*"=.05,"**"=.01,"***"=.001),
    gof_map = gm.latency,
    title = time.cap,
    coef_map = c(
      coefmap,
      "ideo3Conservative" = "Ideology: Conservative",
      "ideo3Not sure" = "Ideology: Not sure",
      "ideo3Liberal" = "Ideology: Liberal"
    ),
    notes = time.notes,
    add_rows = data.frame(
      c("Sample Fixed Effects"),
      c("No"),
      c("Yes"),
      c("No"),
      c("Yes")
    ),
    output = "tinytable",
    escape = FALSE,
    fmt = fmt_significant(2)
  ) %>%
  theme_latex(
    placement = "H"
  ) %>%
  save_tt(time.file, overwrite = TRUE)



# Check how many people say all actors are respnonsible

# Examine the validity of the measure
## How many people say that all political actors are responsible?
message("How many people say that all political actors are extremely responsible?")
summary(g$credit_extreme)

## How many people say that no political actors are responsible?
message("How many people say that no political actors are responsible?")
summary(g$credit_none)

# Calculate summary statistics----

# Biden responsibility
message("Biden responsibility")
biden.mean <- round(weighted.mean(g$credit_biden_bin, g$wt_credit_trim) * 100, 0)
print(paste0("Weighted mean of people saying President Biden is responsible: ", biden.mean, "%"))
cat(as.character(biden.mean), file = here("output", "pnas", "stats", "biden_responsibility.txt"))

# Governor responsibility
message("Governor responsibility")
gov.mean <- round(weighted.mean(g$credit_gov_bin, g$wt_credit_trim) * 100, 0)
print(paste0("Weighted mean of people saying Governor is responsible: ", gov.mean, "%"))
cat(as.character(gov.mean), file = here("output", "pnas", "stats", "gov_responsibility.txt"))

# State responsibility
message("State responsibility")
state.mean <- round(weighted.mean(g$credit_state_bin, g$wt_credit_trim) * 100, 0)
print(paste0("Weighted mean of people saying State is responsible: ", state.mean, "%"))
cat(as.character(state.mean), file = here("output", "pnas", "stats", "state_responsibility.txt"))

# Congressional responsibility
message("Congressional responsibility")
cong.mean <- round(weighted.mean(g$credit_cong_bin, g$wt_credit_trim) * 100, 0)
print(paste0("Weighted mean of people saying Congress is responsible: ", cong.mean, "%"))
cat(as.character(cong.mean), file = here("output", "pnas", "stats", "cong_responsibility.txt"))

# Local government responsibility
message("Local government responsibility")
com.mean <- round(weighted.mean(g$credit_com_bin, g$wt_credit_trim) * 100, 0)
print(paste0("Weighted mean of people saying Local government is responsible: ", com.mean, "%"))
cat(as.character(com.mean), file = here("output", "pnas", "stats", "com_responsibility.txt"))

# Market forces responsibility
message("Market forces responsibility")
market.mean <- round(weighted.mean(g$credit_market_bin, g$wt_credit_trim) * 100, 0)
print(paste0("Weighted mean of people saying Market forces are responsible: ", market.mean, "%"))
cat(as.character(market.mean), file = here("output", "pnas", "stats", "market_responsibility.txt"))

# Weighted t-test for difference in means
message("Weighted t-test between Biden and Governor responsibility")
t.diff <- wtd.t.test(g$credit_biden_bin, g$credit_gov_bin, weight = g$wt_credit_trim)
ti.diff.out <- round(abs(t.diff$additional["Difference"][[1]])*100, 1)
ti.diff.out
cat(as.character(ti.diff.out), file = here("output", "pnas", "stats", "biden_gov_responsibility_diff.txt"))

# Descriptives by partisanship
message("Credit for Biden by...")
dem.gov.mean <- round(weighted.mean(g$credit_gov_bin[g$pid3 == "Democrat"], g$wt_credit_trim[g$pid3 == "Democrat"]) * 100, 0)
dem.gov.mean
print(paste0("Weighted mean of people saying Governor is responsible for Democratic respondents: ", dem.gov.mean, "%"))
cat(as.character(dem.gov.mean), file = here("output", "pnas", "stats", "dem_gov_responsibility.txt"))

rep.gov.mean <- round(weighted.mean(g$credit_gov_bin[g$pid3 == "Republican"], g$wt_credit_trim[g$pid3 == "Republican"]) * 100, 0)
print(paste0("Weighted mean of people saying Governor is responsible for Republican respondents: ", rep.gov.mean, "%"))
cat(as.character(rep.gov.mean), file = here("output", "pnas", "stats", "rep_gov_responsibility.txt"))

ind.gov.mean <- round(weighted.mean(g$credit_gov_bin[g$pid3 == "Neither"], g$wt_credit_trim[g$pid3 == "Neither"]) * 100, 0)
print(paste0("Weighted mean of people saying Governor is responsible for Independent respondents: ", ind.gov.mean, "%"))
cat(as.character(ind.gov.mean), file = here("output", "pnas", "stats", "ind_gov_responsibility.txt"))


message("...Independent respondents:")
with(subset(g, pid3 == "Neither"), weighted.mean(credit_biden_bin, wt_credit_trim))

message("Credit for Governor by...")
message("...Democratic respondents:")
with(subset(g, pid3 == "Democrat"), weighted.mean(credit_gov_bin, wt_credit_trim))
message("...Republican respondents:")
with(subset(g, pid3 == "Republican"), weighted.mean(credit_gov_bin, wt_credit_trim))
message("...Independent respondents:")
with(subset(g, pid3 == "Neither"), weighted.mean(credit_gov_bin, wt_credit_trim))


# Create plots----
p.all <- g %>%
  pivot_longer(cols = c(credit_biden_bin,credit_cong_bin,
                        credit_gov_bin,credit_state_bin,
                        credit_com_bin,credit_market_bin)) %>%
  mutate(
    name = case_when(
      name == "credit_biden_bin" ~ "President Biden",
      name == "credit_cong_bin" ~ "Congress",
      name == "credit_gov_bin" ~ "Governor",
      name == "credit_state_bin" ~ "State Legislature",
      name == "credit_com_bin" ~ "Local Government",
      name == "credit_market_bin" ~ "Market Forces"
    )
  ) %>%
  filter(!is.na(value)) %>%
  group_by(name) %>%
  summarize(prop = weighted.mean(value, wt_all_trim)) %>%
  arrange(desc(prop)) %>%
  mutate(name = factor(name, levels = name)) %>%
  ggplot(aes(x = name, y = prop, label = paste0(round(prop * 100, 0), "%"))) +
  geom_col(position = "dodge") +
  geom_text(aes(label = paste0(round(prop * 100, 0), "%")), vjust = -0.5, size = plot_text_size) +
  theme_classic(base_size = 10) +
  scale_y_continuous(expand = c(0,0), limits = c(0, .71), labels = scales::percent, breaks = seq(0, .7, .1)) +
  labs(
    x = NULL,
    y = "% respondents (weighted)",
    title = "Overall"
  ) +
  theme(
    panel.grid = element_blank(),
    legend.text = element_text(size = 8),
    axis.ticks = element_blank()
  ) + 
  scale_x_discrete(labels = label_wrap(12))

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p.all, here("output", "pnas", "figures", "fig_credit_overall.pdf"))
}

# Partisanship breakdown
p.pid <- g %>%
  pivot_longer(cols = c(credit_biden_bin,credit_cong_bin,
                        credit_gov_bin,credit_state_bin,
                        credit_com_bin,credit_market_bin)) %>%
  mutate(
    name = case_when(
      name == "credit_biden_bin" ~ "President Biden",
      name == "credit_cong_bin" ~ "Congress",
      name == "credit_gov_bin" ~ "Governor",
      name == "credit_state_bin" ~ "State Legislature",
      name == "credit_com_bin" ~ "Local Government",
      name == "credit_market_bin" ~ "Market Forces"
    )
  ) %>%
  filter(!is.na(value)) %>%
  group_by(name, pid3) %>%
  summarize(prop = weighted.mean(value, wt_all_trim)) %>%
  mutate(name = factor(name, levels = c("Governor", "State Legislature",  "Local Government", "President Biden","Congress", "Market Forces"))) %>%
  ggplot(aes(x = name, y = prop, label = paste0(round(prop * 100, 0), "%"), group = pid3)) +
  geom_col(position = "dodge", aes(fill = pid3)) +
  geom_text(aes(label = paste0(round(prop * 100, 0), "%")), vjust = -0.5, size = plot_text_size, position = position_dodge(width = 0.9)) +
  theme_classic(base_size = 10) +
  scale_y_continuous(expand = c(0,0), limits = c(0, .71), labels = scales::percent, breaks = seq(0, .7, .1)) +
  labs(
    x = NULL,
    y = "% respondents (weighted)",
    title = "By respondent partisanship",
    fill = ""
  ) +
  scale_fill_party() +
  theme(
    panel.grid = element_blank(),
    legend.position = "inside",
    legend.position.inside = c(.8, .9),
    legend.box.background = element_blank(),
    legend.background = element_blank(),
    legend.key.size = unit(.9, "lines"),
    axis.ticks = element_blank()
  ) +
  scale_x_discrete(labels = label_wrap(12))

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p.pid, here("output", "pnas", "figures", "fig_credit_pid.pdf"))
}

# Breakdown by visibility
p.credit.vis <- g %>%
  pivot_longer(cols = c(credit_biden_bin,credit_cong_bin,
                        credit_gov_bin,credit_state_bin,
                        credit_com_bin,credit_market_bin)) %>%
  mutate(
    name = case_when(
      name == "credit_biden_bin" ~ "President Biden",
      name == "credit_cong_bin" ~ "Congress",
      name == "credit_gov_bin" ~ "Governor",
      name == "credit_state_bin" ~ "State Legislature",
      name == "credit_com_bin" ~ "Local Government",
      name == "credit_market_bin" ~ "Market Forces"
    )
  ) %>%
  filter(!is.na(value)) %>%
  group_by(name, greenproj_bin) %>%
  summarize(prop = weighted.mean(value, wt_all_trim)) %>%
  mutate(pid3 = case_when(
    greenproj_bin == 0 ~ "Not Visible",
    greenproj_bin == 1 ~ "Visible"
  )) %>%
  mutate(name = factor(name, levels = c("Governor", "State Legislature",  "Local Government", "President Biden","Congress", "Market Forces"))) %>%
  ggplot(aes(x = name, y = prop, label = paste0(round(prop * 100, 0), "%"), group = pid3)) +
  geom_col(position = "dodge", aes(fill = pid3)) +
  geom_text(aes(label = paste0(round(prop * 100, 0), "%")), vjust = -0.5, size = plot_text_size, position = position_dodge(width = 0.9)) +
  theme_classic(base_size = 10) +
  scale_y_continuous(expand = c(0,0), limits = c(0, .71), labels = scales::percent, breaks = seq(0, .7, .1)) +
  labs(
    x = NULL,
    y = "% respondents (weighted)",
    title = "By visibility of green projects",
    fill = ""
  ) +
  scale_fill_greenproj() +
  theme(
    panel.grid = element_blank(),
    legend.position = "inside",
    legend.position.inside = c(.85, .925),
    legend.box.background = element_blank(),
    legend.background = element_blank(),
    legend.key.size = unit(.9, "lines"),
    axis.ticks = element_blank()
  ) +
  scale_x_discrete(labels = label_wrap(12))

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p.credit.vis, here("output", "pnas", "figures", "fig_credit_vis.pdf"))
}

# By governor party
p.gov <- g %>%
  pivot_longer(cols = c(credit_biden_bin,credit_cong_bin,
                        credit_gov_bin,credit_state_bin,
                        credit_com_bin,credit_market_bin)) %>%
  mutate(
    name = case_when(
      name == "credit_biden_bin" ~ "President Biden",
      name == "credit_cong_bin" ~ "Congress",
      name == "credit_gov_bin" ~ "Governor",
      name == "credit_state_bin" ~ "State Legislature",
      name == "credit_com_bin" ~ "Local Government",
      name == "credit_market_bin" ~ "Market Forces"
    )
  ) %>%
  filter(!is.na(value)) %>%
  group_by(name, govparty) %>%
  summarize(prop = weighted.mean(value, wt_all_trim)) %>%
  mutate(pid3 = case_when(
    govparty == "R" ~ "Republican",
    T ~ "Democrat"
  )) %>%
  mutate(name = factor(name, levels = c("Governor", "State Legislature",  "Local Government", "President Biden","Congress", "Market Forces"))) %>%
  ggplot(aes(x = name, y = prop, label = paste0(round(prop * 100, 0), "%"), group = pid3)) +
  geom_col(position = "dodge", aes(fill = pid3)) +
  geom_text(aes(label = paste0(round(prop * 100, 0), "%")), vjust = -0.5, size = plot_text_size, position = position_dodge(width = 0.9)) +
  theme_classic(base_size = 10) +
  scale_y_continuous(expand = c(0,0), limits = c(0, .71), labels = scales::percent, breaks = seq(0, .7, .1)) +
  scale_x_discrete(labels = label_wrap(12)) +
  labs(
    x = NULL,
    y = "% respondents (weighted)",
    title = "By governor party",
    fill = ""
  ) +
  scale_fill_manual(values = c("Democrat" = party_colors$Democrat, "Republican" = party_colors$Republican)) +
  theme(
    panel.grid = element_blank(),
    legend.position = "inside",
    legend.position.inside = c(.85, .925),
    legend.box.background = element_blank(),
    legend.background = element_blank(),
    legend.key.size = unit(.9, "lines"),
    axis.ticks = element_blank()
  )

p.credit.out <- p.all + p.pid + p.credit.vis + p.gov +
  plot_layout(ncol = 2) +
  plot_annotation(tag_levels = "A")

save_pnas_pdf(p.credit.out, here("output", "pnas", "figures", "fig_3_credit_claiming_survey.pdf"), width = "double", height_cm = 10)

# Estimate linear probability models of credit attribution----
credit.outcomes <- c("credit_biden_bin", "credit_cong_bin", "credit_gov_bin", "credit_state_bin", "credit_com_bin", "credit_market_bin")
names(credit.outcomes) <- c("Biden", "Congress", "Governor", "State", "Local", "Markets")

m.credit <- lapply(
  credit.outcomes,
  function(x) {est_model(outcome = x, data.in = g, se = "state", fe = NULL, varname = "govparty*pid3+union_mem_z+eprice_z+greenproj_bin")}
  )
names(m.credit) <- names(credit.outcomes)

tab.credit <- here("output", "pnas", "tables", "tab_S28_credit_attribution.tex")

credit.cap <- paste0("Linear probability models of credit attribution \\label{tab:credit_attribution}")

credit.notes <- paste(
  "\\textit{Notes:} Each column reports a separate linear probability model.",
  "Unit of analysis is the individual survey respondent.",
  "Dependent variable = 1 if respondent credits the column header actor for local green investments, 0 otherwise.",
  "Continuous covariates are standardized: county-level variables use within-state standardization;",
  "state-level variables use full-sample z-scores; individual-level indices are standardized by construction.",
  "Estimates are OLS with cluster-robust standard errors by state in parentheses.",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$.",
  sep = " "
)

modelsummary(
  m.credit,
  stars = c("*"=.05,"**"=.01,"***"=.001),
  coef_map = coefmap,
  add_rows = data.frame(
    c("Sample Fixed Effects"),
    c("Yes"),
    c("Yes"),
    c("Yes"),
    c("Yes"),
    c("Yes"),
    c("Yes")
  ),
  fmt = fmt_significant(2),
  gof_map = gm,
  gof_omit = "IC|RMSE|Std|FE|Within",
  output = "tinytable",
  escape = FALSE,
  title = credit.cap,
  notes = credit.notes
) %>%
  group_tt(
    j = list("Credit Recipient:" = 2:7)
  ) %>%
  theme_latex(resize_width = 0.52, resize_direction = "both", placement = "H") %>%
  save_tt(tab.credit, overwrite = TRUE)

# Within subject comparison of governor versus Biden----

within.outcomes <- c("biden_vs_gov", "biden_vs_state")
names(within.outcomes) <- c("Governor", "State")

m.within <- lapply(
  within.outcomes,
  function(x) {est_model(outcome = x, data.in = g, se = "state", fe = NULL, varname = "govparty*pid3+union_mem+eprice+greenproj_bin")}
)
names(m.within) <- names(within.outcomes)

tab.credit.within <- here("output", "pnas", "tables", "tab_S29_credit_claiming_within.tex")

credit.within.cap <- paste0("Linear probability models of within-subject credit attribution \\label{tab:credit_claiming_within}")

credit.within.notes <- paste(
  "\\textit{Notes:} Each column reports a separate linear probability model.",
  "Unit of analysis is the individual survey respondent.",
  "Dependent variable = 1 if the respondent credits the column header actor but does not credit President Biden, 0 otherwise.",
  "Continuous covariates are standardized: county-level variables use within-state standardization;",
  "state-level variables use full-sample z-scores.",
  "Estimates are OLS with cluster-robust standard errors by state in parentheses.",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$.",
  sep = " "
)

modelsummary(
  m.within,
  stars = c("*"=.05, "**"=.01, "***"=.001),
  coef_map = coefmap,
  add_rows = data.frame(
    c("Sample Fixed Effects"),
    c("Yes"),
    c("Yes")
  ),
  fmt = fmt_significant(2),
  gof_map = gm,
  gof_omit = "IC|RMSE|Std|FE|Within",
  output = "tinytable",
  escape = FALSE,
  title = credit.within.cap,
  notes = credit.within.notes
) %>%
  group_tt(
    j = list("Credit Biden but not the..." = 2:3)
  ) %>%
  theme_latex(resize_width = .34, resize_direction = "both", placement = "H") %>%
  save_tt(tab.credit.within, overwrite = TRUE)